{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# GPU: 32*40 in 8.00s = 160/s\n",
    "# CPU: 32*8 in 115.0s = 2/s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# After installing and starting Julia run the following to install the required packages:\n",
    "# Pkg.init(); Pkg.update()\n",
    "# for p in (\"CUDAapi\",\"CUDAdrv\",\"MAT\",\"Images\",\"IJulia\",\"Knet\"); Pkg.add(p); end\n",
    "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n",
    "# Pkg.build(\"Knet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS: Linux\n",
      "Julia: 0.6.1\n",
      "Knet: 0.8.5+\n"
     ]
    }
   ],
   "source": [
    "using Knet\n",
    "include(Knet.dir(\"examples\",\"resnet\", \"resnetlib.jl\"))\n",
    "using ResNetLib: resnet50init, resnet50\n",
    "println(\"OS: \", Sys.KERNEL)\n",
    "println(\"Julia: \", VERSION)\n",
    "println(\"Knet: \", Pkg.installed(\"Knet\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6\n"
     ]
    }
   ],
   "source": [
    ";cat /proc/cpuinfo '|' grep processor '|' wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name\n",
      "Tesla K80\n"
     ]
    }
   ],
   "source": [
    ";nvidia-smi --query-gpu=gpu_name --format=csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "const BATCH_SIZE = 32\n",
    "const RESNET_FEATURES = 2048\n",
    "const BATCHES_GPU = 40\n",
    "const BATCHES_CPU = 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "fakedata (generic function with 1 method)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create batches of fake data\n",
    "function fakedata(batches; atype=KnetArray)\n",
    "    x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)\n",
    "    minibatch(x, BATCH_SIZE, xtype=atype)\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "predictfn (generic function with 1 method)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Return features from classifier\n",
    "function predictfn(weights, moments, data)\n",
    "    out = []\n",
    "    for x in data\n",
    "        pred = resnet50(weights, moments, x; stage=5)\n",
    "        push!(out, mat(pred))\n",
    "    end\n",
    "    return Array(hcat(out...))\n",
    "end"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n",
      "\u001b[39m\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading imagenet-resnet-50-dag.mat...\n",
      "\u001b[39m"
     ]
    }
   ],
   "source": [
    "# Initialize resnet weights and fake data\n",
    "gpuweights = gpumoments = nothing; knetgc() # clear memory from previous run\n",
    "gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n",
      "\u001b[39m"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 21.220333 seconds (1.93 M allocations: 842.832 MiB, 35.05% gc time)\n"
     ]
    }
   ],
   "source": [
    "info(\"Cold start\")\n",
    "gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)\n",
    "@time predictfn(gpuweights, gpumoments, gpudata1);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n",
      "\u001b[39m"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  8.002292 seconds (360.61 k allocations: 760.376 MiB, 3.82% gc time)\n"
     ]
    }
   ],
   "source": [
    "info(\"Benchmarking\")\n",
    "gpudata = fakedata(BATCHES_GPU, atype=KnetArray)\n",
    "@time predictfn(gpuweights, gpumoments, gpudata);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. CPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n",
      "\u001b[39m"
     ]
    }
   ],
   "source": [
    "# Initialize resnet weights\n",
    "cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n",
      "\u001b[39m"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 25.160136 seconds (14.20 M allocations: 4.351 GiB, 10.91% gc time)\n"
     ]
    }
   ],
   "source": [
    "info(\"Cold start\")\n",
    "cpudata1 = fakedata(1, atype=Array);\n",
    "@time predictfn(cpuweights, cpumoments, cpudata1);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n",
      "\u001b[39m"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "115.024997 seconds (174.89 k allocations: 30.150 GiB, 15.85% gc time)\n"
     ]
    }
   ],
   "source": [
    "info(\"Benchmarking\")\n",
    "cpudata = fakedata(BATCHES_CPU, atype=Array);\n",
    "@time predictfn(cpuweights, cpumoments, cpudata);"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 0.6.1",
   "language": "julia",
   "name": "julia-0.6"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "0.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
